Automate emotion analysis of textual comments and feedback¶

Importing necessary packages¶

In [1]:
!pip install nlp
!pip install datasets
import tensorflow as tf
import numpy as np
import pandas as pd
from wordcloud import WordCloud
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nlp
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Embedding
from keras.layers import Flatten
from keras.layers import Bidirectional
from keras.callbacks import EarlyStopping
from keras.layers import GlobalAvgPool1D
import random
Requirement already satisfied: nlp in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (0.4.0)
Requirement already satisfied: numpy in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (1.26.2)
Requirement already satisfied: pyarrow>=0.16.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (16.0.0)
Requirement already satisfied: dill in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (0.3.8)
Requirement already satisfied: pandas in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (2.2.2)
Requirement already satisfied: requests>=2.19.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (2.31.0)
Requirement already satisfied: tqdm>=4.27 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (4.66.2)
Requirement already satisfied: filelock in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (3.14.0)
Requirement already satisfied: xxhash in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from nlp) (3.4.1)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->nlp) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->nlp) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->nlp) (2.1.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->nlp) (2023.11.17)
Requirement already satisfied: colorama in c:\users\mayank\appdata\roaming\python\python312\site-packages (from tqdm>=4.27->nlp) (0.4.6)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\mayank\appdata\roaming\python\python312\site-packages (from pandas->nlp) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from pandas->nlp) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from pandas->nlp) (2024.1)
Requirement already satisfied: six>=1.5 in c:\users\mayank\appdata\roaming\python\python312\site-packages (from python-dateutil>=2.8.2->pandas->nlp) (1.16.0)
Requirement already satisfied: datasets in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (2.19.0)
Requirement already satisfied: filelock in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (3.14.0)
Requirement already satisfied: numpy>=1.17 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (1.26.2)
Requirement already satisfied: pyarrow>=12.0.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (16.0.0)
Requirement already satisfied: pyarrow-hotfix in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (0.6)
Requirement already satisfied: dill<0.3.9,>=0.3.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (0.3.8)
Requirement already satisfied: pandas in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (2.2.2)
Requirement already satisfied: requests>=2.19.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (2.31.0)
Requirement already satisfied: tqdm>=4.62.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (4.66.2)
Requirement already satisfied: xxhash in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (3.4.1)
Requirement already satisfied: multiprocess in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (0.70.16)
Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)
Requirement already satisfied: aiohttp in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (3.9.5)
Requirement already satisfied: huggingface-hub>=0.21.2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (0.23.0)
Requirement already satisfied: packaging in c:\users\mayank\appdata\roaming\python\python312\site-packages (from datasets) (23.2)
Requirement already satisfied: pyyaml>=5.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from datasets) (6.0.1)
Requirement already satisfied: aiosignal>=1.1.2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from aiohttp->datasets) (1.3.1)
Requirement already satisfied: attrs>=17.3.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from aiohttp->datasets) (23.1.0)
Requirement already satisfied: frozenlist>=1.1.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from aiohttp->datasets) (1.4.1)
Requirement already satisfied: multidict<7.0,>=4.5 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from aiohttp->datasets) (6.0.5)
Requirement already satisfied: yarl<2.0,>=1.0 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from aiohttp->datasets) (1.9.4)
Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from huggingface-hub>=0.21.2->datasets) (4.11.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->datasets) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->datasets) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->datasets) (2.1.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from requests>=2.19.0->datasets) (2023.11.17)
Requirement already satisfied: colorama in c:\users\mayank\appdata\roaming\python\python312\site-packages (from tqdm>=4.62.1->datasets) (0.4.6)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\mayank\appdata\roaming\python\python312\site-packages (from pandas->datasets) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from pandas->datasets) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from pandas->datasets) (2024.1)
Requirement already satisfied: six>=1.5 in c:\users\mayank\appdata\roaming\python\python312\site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)

The datasets library from Hugging Face provides easy access to a wide range of datasets that are commonly used for NLP tasks.¶

It offers functionalities to download, preprocess, and work with datasets seamlessly, allowing users to load datasets easily into their machine learning or deep learning pipelines.¶

In [2]:
pip install dataset
Requirement already satisfied: dataset in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (1.6.2)
Requirement already satisfied: sqlalchemy<2.0.0,>=1.3.2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from dataset) (1.4.52)
Requirement already satisfied: alembic>=0.6.2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from dataset) (1.13.1)
Requirement already satisfied: banal>=1.0.1 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from dataset) (1.0.6)
Requirement already satisfied: Mako in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from alembic>=0.6.2->dataset) (1.3.3)
Requirement already satisfied: typing-extensions>=4 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from alembic>=0.6.2->dataset) (4.11.0)
Requirement already satisfied: greenlet!=0.4.17 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from sqlalchemy<2.0.0,>=1.3.2->dataset) (3.0.3)
Requirement already satisfied: MarkupSafe>=0.9.2 in c:\users\mayank\appdata\local\programs\python\python312\lib\site-packages (from Mako->alembic>=0.6.2->dataset) (2.1.3)
Note: you may need to restart the kernel to use updated packages.

Importing the Dataset¶

In [3]:
from datasets import load_dataset
data = load_dataset('emotion')
C:\Users\Mayank\AppData\Local\Programs\Python\Python312\Lib\site-packages\datasets\load.py:1486: FutureWarning: The repository for emotion contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/emotion
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
  warnings.warn(
In [4]:
# Converting the train, validation and test datasets into DataFrame format
train = pd.DataFrame(data['train'])
validation = pd.DataFrame(data['validation'])
test = pd.DataFrame(data['test'])
In [5]:
train.head(10)
Out[5]:
text label
0 i didnt feel humiliated 0
1 i can go from feeling so hopeless to so damned... 0
2 im grabbing a minute to post i feel greedy wrong 3
3 i am ever feeling nostalgic about the fireplac... 2
4 i am feeling grouchy 3
5 ive been feeling a little burdened lately wasn... 0
6 ive been taking or milligrams or times recomme... 5
7 i feel as confused about life as a teenager or... 4
8 i have been with petronas for years i feel tha... 1
9 i feel romantic too 2
In [6]:
train['label'].unique()
Out[6]:
array([0, 3, 2, 5, 4, 1], dtype=int64)

Distribution of the Length of the Texts¶

In [7]:
train['length_of_text'] = [len(i.split(' ')) for i in train['text']]

fig = px.histogram(train['length_of_text'], marginal='box',
                   labels={"value": "Length of the Text"})

fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Distribution of the Length of the Texts',
                  title_x=0.5, title_font=dict(size=22))
fig.show()

Distribution of the Length of the Texts by Emotions¶

In [8]:
fig = px.histogram(train['length_of_text'], marginal='box',
                   labels={"value": "Length of the Text"},
                   color=train['label'])
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Distribution of the Length of the Texts by Emotions',
                  title_x=0.5, title_font=dict(size=22))
fig.show()

Distribution of the Labels¶

In [9]:
fig = px.histogram(train, x='label', color='label')
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Distribution of the Labels',
                  title_x=0.5, title_font=dict(size=22))
fig.show()
In [10]:
FreqOfWords = train['text'].str.split(expand=True).stack().value_counts()
FreqOfWords_top200 = FreqOfWords[:200]

top_words_df = pd.DataFrame({'word': FreqOfWords_top200.index, 'frequency': FreqOfWords_top200.values})

import plotly.express as px

fig = px.treemap(top_words_df, path=['word'], values='frequency')
fig.update_layout(title_text='Frequency of the Words in the Train Dataset', title_x=0.5, title_font=dict(size=22))
fig.update_traces(textinfo="label+value")
fig.show()

Tokenizing with NLTK¶

In [11]:
import nltk
nltk.download('punkt')
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Mayank\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Out[11]:
True
In [12]:
def tokenization(inputs):
    return word_tokenize(inputs) #REFERENCE[1]


train['text_tokenized'] = train['text'].apply(tokenization)
validation['text_tokenized'] = validation['text'].apply(tokenization)
In [13]:
train.head()
Out[13]:
text label length_of_text text_tokenized
0 i didnt feel humiliated 0 4 [i, didnt, feel, humiliated]
1 i can go from feeling so hopeless to so damned... 0 21 [i, can, go, from, feeling, so, hopeless, to, ...
2 im grabbing a minute to post i feel greedy wrong 3 10 [im, grabbing, a, minute, to, post, i, feel, g...
3 i am ever feeling nostalgic about the fireplac... 2 18 [i, am, ever, feeling, nostalgic, about, the, ...
4 i am feeling grouchy 3 4 [i, am, feeling, grouchy]

Stopwords Removal¶

In [14]:
import nltk
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Mayank\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Out[14]:
True
In [15]:
stop_words = set(stopwords.words('english'))

def stopwords_remove(inputs):
    return [item for item in inputs if item not in stop_words]

train['text_stop'] = train['text_tokenized'].apply(stopwords_remove)
validation['text_stop'] = validation['text_tokenized'].apply(stopwords_remove)

train.head()
Out[15]:
text label length_of_text text_tokenized text_stop
0 i didnt feel humiliated 0 4 [i, didnt, feel, humiliated] [didnt, feel, humiliated]
1 i can go from feeling so hopeless to so damned... 0 21 [i, can, go, from, feeling, so, hopeless, to, ... [go, feeling, hopeless, damned, hopeful, aroun...
2 im grabbing a minute to post i feel greedy wrong 3 10 [im, grabbing, a, minute, to, post, i, feel, g... [im, grabbing, minute, post, feel, greedy, wrong]
3 i am ever feeling nostalgic about the fireplac... 2 18 [i, am, ever, feeling, nostalgic, about, the, ... [ever, feeling, nostalgic, fireplace, know, st...
4 i am feeling grouchy 3 4 [i, am, feeling, grouchy] [feeling, grouchy]

Lemmatization¶

In [16]:
import nltk
nltk.download('wordnet')
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Mayank\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Out[16]:
True
In [17]:
lemmatizer = WordNetLemmatizer()

def lemmatization(inputs):
    return [lemmatizer.lemmatize(word=x, pos='v') for x in inputs]

train['text_lemmatized'] = train['text_stop'].apply(lemmatization)
validation['text_lemmatized'] = validation['text_stop'].apply(lemmatization)

train.head()
Out[17]:
text label length_of_text text_tokenized text_stop text_lemmatized
0 i didnt feel humiliated 0 4 [i, didnt, feel, humiliated] [didnt, feel, humiliated] [didnt, feel, humiliate]
1 i can go from feeling so hopeless to so damned... 0 21 [i, can, go, from, feeling, so, hopeless, to, ... [go, feeling, hopeless, damned, hopeful, aroun... [go, feel, hopeless, damn, hopeful, around, so...
2 im grabbing a minute to post i feel greedy wrong 3 10 [im, grabbing, a, minute, to, post, i, feel, g... [im, grabbing, minute, post, feel, greedy, wrong] [im, grab, minute, post, feel, greedy, wrong]
3 i am ever feeling nostalgic about the fireplac... 2 18 [i, am, ever, feeling, nostalgic, about, the, ... [ever, feeling, nostalgic, fireplace, know, st... [ever, feel, nostalgic, fireplace, know, still...
4 i am feeling grouchy 3 4 [i, am, feeling, grouchy] [feeling, grouchy] [feel, grouchy]

Joining Tokens into Sentences¶

In [18]:
train['text_cleaned'] = train['text_lemmatized'].str.join(' ')
validation['text_cleaned'] = validation['text_lemmatized'].str.join(' ')

train.head() # Final form of the dataset
Out[18]:
text label length_of_text text_tokenized text_stop text_lemmatized text_cleaned
0 i didnt feel humiliated 0 4 [i, didnt, feel, humiliated] [didnt, feel, humiliated] [didnt, feel, humiliate] didnt feel humiliate
1 i can go from feeling so hopeless to so damned... 0 21 [i, can, go, from, feeling, so, hopeless, to, ... [go, feeling, hopeless, damned, hopeful, aroun... [go, feel, hopeless, damn, hopeful, around, so... go feel hopeless damn hopeful around someone c...
2 im grabbing a minute to post i feel greedy wrong 3 10 [im, grabbing, a, minute, to, post, i, feel, g... [im, grabbing, minute, post, feel, greedy, wrong] [im, grab, minute, post, feel, greedy, wrong] im grab minute post feel greedy wrong
3 i am ever feeling nostalgic about the fireplac... 2 18 [i, am, ever, feeling, nostalgic, about, the, ... [ever, feeling, nostalgic, fireplace, know, st... [ever, feel, nostalgic, fireplace, know, still... ever feel nostalgic fireplace know still property
4 i am feeling grouchy 3 4 [i, am, feeling, grouchy] [feeling, grouchy] [feel, grouchy] feel grouchy

WordCloud of the Cleaned Dataset¶

In [19]:
WordCloud = WordCloud(max_words=100,
                      random_state=30,
                      collocations=True).generate(str((train['text_cleaned'])))

plt.figure(figsize=(15, 8))
plt.imshow(WordCloud, interpolation='bilinear')
plt.axis("off")
plt.show()
No description has been provided for this image

¶

Tokenizing with Tensorflow

In [20]:
num_words = 10000
tokenizer = Tokenizer(num_words=num_words, oov_token='<OOV>')
tokenizer.fit_on_texts(train['text_cleaned'])

word_index = tokenizer.word_index
In [21]:
num_words = 10000
tokenizer = Tokenizer(num_words=num_words, oov_token='<OOV>')
tokenizer.fit_on_texts(train['text_cleaned'])

word_index = tokenizer.word_index
In [22]:
Tokenized_train = tokenizer.texts_to_sequences(train['text_cleaned'])
Tokenized_val = tokenizer.texts_to_sequences(validation['text_cleaned'])
In [23]:
print('Non-tokenized Version: ', train['text_cleaned'][0])
print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
print('--'*50)
print('Non-tokenized Version: ', train['text_cleaned'][10])
print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
print('--'*50)
print('Non-tokenized Version: ', train['text'][100])
print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][100]]))
Non-tokenized Version:  didnt feel humiliate
Tokenized Version:  [[56, 2, 559]]
----------------------------------------------------------------------------------------------------
Non-tokenized Version:  feel like make suffer see mean something
Tokenized Version:  [[2, 3, 6, 393, 31, 102, 25]]
----------------------------------------------------------------------------------------------------
Non-tokenized Version:  i wont let me child cry it out because i feel that loving her and lily when she was little was going to be opportunities that only lasted for those short few months
Tokenized Version:  [[356, 82, 289, 230, 2, 14, 3422, 13, 7, 2310, 87, 742, 263]]

Padding¶

In [24]:
maxlen = 40
Padded_train = pad_sequences(Tokenized_train, maxlen=maxlen, padding='pre')
Padded_val = pad_sequences(Tokenized_val, maxlen=maxlen, padding='pre')

print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
print('Padded Version: ', Padded_train[0])
print('--'*50)
print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
print('Padded Version: ', Padded_train[10])
Non-padded Version:  [[56, 2, 559]]
Padded Version:  [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  56   2 559]
----------------------------------------------------------------------------------------------------
Non-padded Version:  [[2, 3, 6, 393, 31, 102, 25]]
Padded Version:  [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   2   3   6
 393  31 102  25]

Creating the Model¶

In [26]:
model = Sequential()

model.add(Embedding(num_words, 16))
model.add(GlobalAvgPool1D())

tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, activation='relu'))
model.add(Dropout(0.3))

tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu', return_sequences=True))
model.add(Dropout(0.3))

tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(6, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ embedding_1 (Embedding)              │ ?                           │     0 (unbuilt) │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ global_average_pooling1d_1           │ ?                           │     0 (unbuilt) │
│ (GlobalAveragePooling1D)             │                             │                 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_3 (Dropout)                  │ ?                           │     0 (unbuilt) │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_4 (Dropout)                  │ ?                           │     0 (unbuilt) │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_5 (Dropout)                  │ ?                           │     0 (unbuilt) │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ ?                           │     0 (unbuilt) │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 0 (0.00 B)
 Trainable params: 0 (0.00 B)
 Non-trainable params: 0 (0.00 B)

Training the Model¶

In [27]:
label_ = {"sadness": 0, "joy": 1, "love": 2, "anger": 3, "fear": 4, "surprise": 5}
train['label'] = train['label'].replace(label_)
validation['label'] = validation['label'].replace(label_)

train.head()
Out[27]:
text label length_of_text text_tokenized text_stop text_lemmatized text_cleaned
0 i didnt feel humiliated 0 4 [i, didnt, feel, humiliated] [didnt, feel, humiliated] [didnt, feel, humiliate] didnt feel humiliate
1 i can go from feeling so hopeless to so damned... 0 21 [i, can, go, from, feeling, so, hopeless, to, ... [go, feeling, hopeless, damned, hopeful, aroun... [go, feel, hopeless, damn, hopeful, around, so... go feel hopeless damn hopeful around someone c...
2 im grabbing a minute to post i feel greedy wrong 3 10 [im, grabbing, a, minute, to, post, i, feel, g... [im, grabbing, minute, post, feel, greedy, wrong] [im, grab, minute, post, feel, greedy, wrong] im grab minute post feel greedy wrong
3 i am ever feeling nostalgic about the fireplac... 2 18 [i, am, ever, feeling, nostalgic, about, the, ... [ever, feeling, nostalgic, fireplace, know, st... [ever, feel, nostalgic, fireplace, know, still... ever feel nostalgic fireplace know still property
4 i am feeling grouchy 3 4 [i, am, feeling, grouchy] [feeling, grouchy] [feel, grouchy] feel grouchy
In [28]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='auto', patience=5,
                                                 restore_best_weights=True)

epochs = 100
hist = model.fit(Padded_train, train['label'], epochs=epochs,
                 validation_data=(Padded_val, validation['label']),
                 callbacks=[early_stopping])
Epoch 1/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - accuracy: 0.3058 - loss: 1.6453 - val_accuracy: 0.3520 - val_loss: 1.5789
Epoch 2/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.3328 - loss: 1.5858 - val_accuracy: 0.3520 - val_loss: 1.5693
Epoch 3/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.3493 - loss: 1.5715 - val_accuracy: 0.3520 - val_loss: 1.5570
Epoch 4/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.3557 - loss: 1.5604 - val_accuracy: 0.3520 - val_loss: 1.5407
Epoch 5/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.3703 - loss: 1.5371 - val_accuracy: 0.3540 - val_loss: 1.5132
Epoch 6/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4169 - loss: 1.4933 - val_accuracy: 0.3655 - val_loss: 1.4713
Epoch 7/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4509 - loss: 1.4388 - val_accuracy: 0.4920 - val_loss: 1.4138
Epoch 8/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4872 - loss: 1.3717 - val_accuracy: 0.6000 - val_loss: 1.3438
Epoch 9/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.5338 - loss: 1.2959 - val_accuracy: 0.5860 - val_loss: 1.2628
Epoch 10/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.5735 - loss: 1.2076 - val_accuracy: 0.6550 - val_loss: 1.1799
Epoch 11/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.6243 - loss: 1.1263 - val_accuracy: 0.6765 - val_loss: 1.1032
Epoch 12/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.6604 - loss: 1.0501 - val_accuracy: 0.7540 - val_loss: 1.0271
Epoch 13/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.6977 - loss: 0.9732 - val_accuracy: 0.7620 - val_loss: 0.9473
Epoch 14/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7296 - loss: 0.9100 - val_accuracy: 0.7995 - val_loss: 0.8737
Epoch 15/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7511 - loss: 0.8390 - val_accuracy: 0.8315 - val_loss: 0.8230
Epoch 16/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7692 - loss: 0.8013 - val_accuracy: 0.8265 - val_loss: 0.7579
Epoch 17/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7878 - loss: 0.7368 - val_accuracy: 0.8360 - val_loss: 0.7103
Epoch 18/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7981 - loss: 0.6935 - val_accuracy: 0.8425 - val_loss: 0.6640
Epoch 19/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8067 - loss: 0.6642 - val_accuracy: 0.8410 - val_loss: 0.6254
Epoch 20/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8110 - loss: 0.6326 - val_accuracy: 0.8425 - val_loss: 0.5933
Epoch 21/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8297 - loss: 0.5910 - val_accuracy: 0.8535 - val_loss: 0.5599
Epoch 22/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8396 - loss: 0.5555 - val_accuracy: 0.8655 - val_loss: 0.5327
Epoch 23/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8368 - loss: 0.5369 - val_accuracy: 0.8660 - val_loss: 0.5125
Epoch 24/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8404 - loss: 0.5210 - val_accuracy: 0.8585 - val_loss: 0.5013
Epoch 25/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8482 - loss: 0.5153 - val_accuracy: 0.8535 - val_loss: 0.4860
Epoch 26/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8547 - loss: 0.4789 - val_accuracy: 0.8665 - val_loss: 0.4602
Epoch 27/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8620 - loss: 0.4593 - val_accuracy: 0.8700 - val_loss: 0.4458
Epoch 28/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8614 - loss: 0.4567 - val_accuracy: 0.8710 - val_loss: 0.4349
Epoch 29/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8649 - loss: 0.4433 - val_accuracy: 0.8710 - val_loss: 0.4268
Epoch 30/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8709 - loss: 0.4332 - val_accuracy: 0.8740 - val_loss: 0.4163
Epoch 31/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8707 - loss: 0.4164 - val_accuracy: 0.8720 - val_loss: 0.4064
Epoch 32/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8722 - loss: 0.4027 - val_accuracy: 0.8805 - val_loss: 0.4007
Epoch 33/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8767 - loss: 0.3934 - val_accuracy: 0.8755 - val_loss: 0.3946
Epoch 34/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8841 - loss: 0.3898 - val_accuracy: 0.8720 - val_loss: 0.3993
Epoch 35/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8786 - loss: 0.3872 - val_accuracy: 0.8740 - val_loss: 0.3856
Epoch 36/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8826 - loss: 0.3690 - val_accuracy: 0.8805 - val_loss: 0.3820
Epoch 37/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8891 - loss: 0.3645 - val_accuracy: 0.8810 - val_loss: 0.3823
Epoch 38/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8811 - loss: 0.3649 - val_accuracy: 0.8735 - val_loss: 0.3776
Epoch 39/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8877 - loss: 0.3523 - val_accuracy: 0.8720 - val_loss: 0.3742
Epoch 40/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8911 - loss: 0.3456 - val_accuracy: 0.8810 - val_loss: 0.3730
Epoch 41/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8968 - loss: 0.3352 - val_accuracy: 0.8700 - val_loss: 0.3899
Epoch 42/100
500/500 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8944 - loss: 0.3318 - val_accuracy: 0.8755 - val_loss: 0.3678

Train and Validation Loss Graphs¶

In [29]:
plt.figure(figsize=(15, 8))
plt.plot(hist.history['loss'], label='Train Loss')
plt.plot(hist.history['val_loss'], label='Validation Loss')
plt.title('Train and Validation Loss Graphs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
Out[29]:
<matplotlib.legend.Legend at 0x2981b779310>
No description has been provided for this image

Preparing the Test Data¶

In [30]:
test['text_tokenized'] = test['text'].apply(tokenization)
test['text_stop'] = test['text_tokenized'].apply(stopwords_remove)
test['text_lemmatized'] = test['text_stop'].apply(lemmatization)
test['text_cleaned'] = test['text_lemmatized'].str.join(' ')

Tokenized_test = tokenizer.texts_to_sequences(test['text_cleaned'])
Padded_test = pad_sequences(Tokenized_test, maxlen=maxlen, padding='pre')

test['label'] = test['label'].replace(label_)

test_evaluate = model.evaluate(Padded_test, test['label'])
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8779 - loss: 0.3730
In [31]:
test.head()
Out[31]:
text label text_tokenized text_stop text_lemmatized text_cleaned
0 im feeling rather rotten so im not very ambiti... 0 [im, feeling, rather, rotten, so, im, not, ver... [im, feeling, rather, rotten, im, ambitious, r... [im, feel, rather, rotten, im, ambitious, right] im feel rather rotten im ambitious right
1 im updating my blog because i feel shitty 0 [im, updating, my, blog, because, i, feel, shi... [im, updating, blog, feel, shitty] [im, update, blog, feel, shitty] im update blog feel shitty
2 i never make her separate from me because i do... 0 [i, never, make, her, separate, from, me, beca... [never, make, separate, ever, want, feel, like... [never, make, separate, ever, want, feel, like... never make separate ever want feel like ashamed
3 i left with my bouquet of red and yellow tulip... 1 [i, left, with, my, bouquet, of, red, and, yel... [left, bouquet, red, yellow, tulips, arm, feel... [leave, bouquet, red, yellow, tulips, arm, fee... leave bouquet red yellow tulips arm feel sligh...
4 i was feeling a little vain when i did this one 0 [i, was, feeling, a, little, vain, when, i, di... [feeling, little, vain, one] [feel, little, vain, one] feel little vain one

Making Predictions in the Test Data¶

In [32]:
def make_predictions(text_input):
    text_input = str(text_input)
    text_input = tokenization(text_input)
    text_input = stopwords_remove(text_input)
    text_input = lemmatization(text_input)
    text_input = ' '.join(text_input)
    text_input = tokenizer.texts_to_sequences([text_input])
    text_input = pad_sequences(text_input, maxlen=maxlen, padding='pre')
    text_input = np.argmax(model.predict(text_input))

    if text_input == 0:
        print('Predicted Emotion: Sadness')
    elif text_input == 1:
        print('Predicted Emotion: Joy')
    elif text_input == 2:
        print('Predicted Emotion: Love')
    elif text_input == 3:
        print('Predicted Emotion: Anger')
    elif text_input == 4:
        print('Predicted Emotion: Fear')
    else:
        print('Predicted Emotion: Surprise')
    return text_input

label_ = {0: "Sadness", 1: "Joy", 2: "Love", 3: "Anger", 4: "Fear", 5: "Surprise"}
test['label'] = test['label'].replace(label_)

# Randomly chosen Test Dataset data points
i = random.randint(0, len(test) - 1)

print('Test Text:', test['text'][i])
print(' ')
print('Actual Emotion:', test['label'][i])
make_predictions(test['text'][i])
print('-'*50)
print('Test Text:', test['text'][i+1])
print(' ')
print('Actual Emotion:', test['label'][i+1])
make_predictions(test['text'][i+1])
Test Text: i am feeling ok lots of bruising to my arms where they decided to remove blood from me
 
Actual Emotion: Joy
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 86ms/step
Predicted Emotion: Joy
--------------------------------------------------
Test Text: i just don t feel that the others are worthwhile
 
Actual Emotion: Joy
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 50ms/step
Predicted Emotion: Joy
Out[32]:
1

Confusion Matrix of the Test Data¶

In [33]:
from sklearn.metrics import confusion_matrix
import numpy as np

label_ = {"Sadness": 0, "Joy": 1, "Love": 2, "Anger": 3, "Fear": 4, "Surprise": 5}
test['label'] = test['label'].replace(label_)

predictions = model.predict(Padded_test)
pred = np.argmax(predictions, axis=1)

# pred = model.predict_classes(Padded_test)
plt.figure(figsize=(15, 8))
conf_mat = confusion_matrix(test['label'].values, pred)
conf_mat = pd.DataFrame(conf_mat, columns=np.unique(test['label']), index=np.unique(pred))
conf_mat.index.name = 'Actual'
conf_mat.columns.name = 'Predicted'
sns.heatmap(conf_mat, annot=True, fmt='g')
plt.title('Confusion Matrix of the Test Data', fontsize=14)
plt.show()
 1/63 ━━━━━━━━━━━━━━━━━━━━ 5s 91ms/step
C:\Users\Mayank\AppData\Local\Temp\ipykernel_12756\422763479.py:5: FutureWarning:

Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`

63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step
No description has been provided for this image
In [34]:
make_predictions("She’s flying high after the successful product launch.")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
Predicted Emotion: Joy
Out[34]:
1
In [35]:
make_predictions("I’m going to have the first meeting with a big client tomorrow, and I’m feeling butterflies in my stomach")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step
Predicted Emotion: Love
Out[35]:
2
In [36]:
make_predictions("I just asked one question to confirm his request, and my boss bit my head off.")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step
Predicted Emotion: Anger
Out[36]:
3
In [37]:
make_predictions('No one told you when to run, you missed the starting gun')
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step
Predicted Emotion: Sadness
Out[37]:
0
In [38]:
make_predictions("Sometimes the people who appear to be the most confident are actually afraid of their own shadows.")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 66ms/step
Predicted Emotion: Fear
Out[38]:
4
In [39]:
make_predictions("I'm really impressed that Ashley can speak 7 languages, whereas I only speak one!")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step
Predicted Emotion: Surprise
Out[39]:
5
In [ ]: